perm filename TEXHYF.SAI[TEX,SYS] blob
sn#554439 filedate 1981-01-08 generic text, type C, neo UTF8
COMMENT ⊗ VALID 00008 PAGES
C REC PAGE DESCRIPTION
C00001 00001
C00002 00002 begin comment TEX hyphenation routines
C00005 00003 procedure hyphenate(integer u,n)
C00021 00004 Initializing the exception table
C00031 00005 Initializing the suffix table
C00038 00006 Initializing the prefix table
C00044 00007 Initializing the consonant-pair table
C00046 00008 The driver program
C00047 ENDMK
C⊗;
begin comment TEX hyphenation routines;
comment This program was used to test TEX's hyphenation routines before
incorporating them verbatim into TEX itself. The tables and program may not be
quite up-to-date, TEXPRE and TEXSEM contain the current versions;
comment external procedure bail;
define bitsperwd=36;
require "⊂⊃⊂⊃" delimiters;
define # = ⊂;comment⊃;
define newline = ⊂('15&'12)⊃;
define thru = ⊂step 1 until⊃;
define flag=⊂(1 rot -1)⊃;
integer array mem[0:100];
define fs(f) = ⊂f⊃&"s" # field size of f, in bits;
define fd(f) = ⊂f⊃&"d" # field displacement of f, in bits;
define field(f,x) = ⊂ifc fd(f)=0 thenc ((x) land (2↑fs(f)-1))
elsec ifc fs(f)+fd(f)≥bitsperwd thenc ((x) lsh -fd(f))
elsec (((x) lsh -fd(f)) land (2↑fs(f)-1)) endc endc⊃ # field f of x;
define excepsize=337,sufsize=116,prefsize=109,btabsize=30 # hyphenation table sizes;
integer array exceptable[0:excepsize-1] # ordered hash table for exceptional words;
integer array excephyph[1:excepsize-1] # corresponding hyphenation patterns;
integer array suffix[0:sufsize-1] # interpretive commands for suffixes;
integer array prefix[0:prefsize-1] # interpretive commands for prefixes;
integer array btable[2:btabsize+1] # consonant-pair exception table;
procedure confusion;print("confusion");
integer finale # location of final "e" when the suffix routine starts
(temporarily set to 999999 if the suffix "ed" was just removed);
procedure hyphenate(integer u,n);
begin comment Assuming that mem[u]=0, mem[u+1]=a[i] for 1≤i≤n, mem[u+n+1]=0,
this procedure hyphenates the word a[1]...a[n] by setting mem[u+i]←0 when
a hyphen comes just before a[i], using TEX's hyphenation algorithm;
integer b,c,h,i,j,t,pc;
boolean firsttime;
label hashloop,phase2,sufbegin,interps,falsexx,marksuf,restarts,phase3,checkc,
restartp,interpp,marki,phase4c,vowelscan,phase4v,phase4vc,ertest,phase5,hashsearch;
comment People who don't like go to statements should not read this;
define o(c)=⊂"c" land '37⊃ # five-bit version of ascii character c;
finale←1000000 # infinity;
comment Phase 1. Search exception dictionary (an ordered hash table);
j← 7 min n;
hashsearch: t←mem[u+1];
for i←u+2 thru u+j do t←(t lsh 5)+mem[i];
h←t mod excepsize;
hashloop: while exceptable[h]>t do h←h-1;
if exceptable[h]≠t then
begin if h then
begin if j≠n or mem[u+n]≠o(s) then go to phase2;
j←j-1; go to hashsearch;
end;
h←excepsize-1; go to hashloop;
end;
comment Now the first 7 letters have been found in exceptable[h].
The corresponding hyphenation pattern appears in excephyph[h], but it
may be necessary to check more than seven letters to make sure the exception
applies. Additional letters to check appear at the righthand side of
excephyph[h], in a straightforward manner exhibited by the following code;
t←excephyph[h];
while t land '37 do
begin comment must check another letter;
j←j+1;
if mem[u+j]≠t land '37 then go to phase2;
t←t lsh -5;
end;
t←excephyph[h] land(flag ash(2-n)) # leftmost n-1 bits;
i←u+3;
while t do
begin if t<0 then mem[i]←0;
t←t lsh 1; i←i+1;
end;
go to phase5;
comment Phase 2. Interpretive routine for suffix removal.
The array suffix contains a "program" for a machine with the following
architecture. Instruction words have four fields, namely opcode, truex,
falsex, operand, each 9 bits. There are two registers: the program counter pc and
the character position i. There is also a toggle called firsttime.
Initially i=u+n-1, pc=mem[u+n], firsttime=true.
(Thus we begin by branching on the final character, mem[u+n].) The opcodes
are as follows, using t to stand for the operand field of the instruction:
scan. If mem[i]=t, decrease i by 1 and go to truex, else go to falsex.
double. Analogous, but tests if mem[i]=mem[i-1].
table. Analogous, but tests if mem[i]εsuffix[t], where xεy means that
word y shifted left x bits has a leading 1 bit.
check. Analogous, but tests if i>u+t and does not decrease i.
success. Sets mem[i+t+1]←0, stops.
fail. Stops.
repeat. Sets mem[i+t+1]←0, firsttime←false, i←i+t-1, pc←mem[i+1]. Thus,
the suffix routine is re-entered before the present suffix.*
again. If firsttime, sets firsttime←false, i←u+n-2, pc←mem[i+1]. Thus,
the suffix routine is re-entered with the final character omitted.*
Otherwise goes to truex.
mark. If t>0 or firsttime, sets mem[i+t+1]←0. Then goes to truex.
efail. (Special routine used to omit "ed".) If mem[u+n]="d" and
mem[u+n-1]="e", sets mem[u+n-1]←0, i←u+n-3, pc←mem[u+n-2]. Otherwise stops.
* Actually the suffix routine is reentered only when i≥u+3;
define opcodes=9,opcoded=27,truexs=9,truexd=18,falsexs=9,falsexd=9,oprands=9,
oprandd=0 # fields in interpreted instructions;
comment the above uses the fact that bitsperwd=36, much smaller fields would work;
define scan=0,double=1,table=2,check=3,success=4,fail=5,repeat=6,again=7,
mark=8,efail=9 # numeric equivalents of symbolic opcodes;
phase2: i←u+n-1; firsttime←true;
sufbegin: pc←mem[i+1]; if pc=o(e) then finale←i+1
else if finale=999999 then finale←i+2 else finale←1000000;
interps: case field(opcode,t←suffix[pc]) of begin
[scan] if(mem[i] xor t) land '37 then go to falsexx else i←i-1;
[double]if mem[i]≠mem[i-1] then go to falsexx else i←i-1;
[table] if(suffix[field(oprand,t)]lsh mem[i])≥0 then go to falsexx else i←i-1;
[check] if i≤u+field(oprand,t) then go to falsexx;
[success] begin mem[i+field(oprand,t)+1]←0; go to phase3 end;
[fail] go to phase3;
[repeat] begin i←i+field(oprand,t)-1; go to marksuf end;
[again] if firsttime then begin i←u+n-2; go to restarts end;
[mark] if (j←field(oprand,t)) or firsttime then mem[i+j+1]←0;
[efail] if mem[u+n]=o(d) and mem[u+n-1]=o(e) then
begin i←u+n-3; finale←999999; go to marksuf;
end
else go to phase3;
else confusion
end;
pc←field(truex,t); go to interps;
falsexx: pc←field(falsex,t); go to interps;
marksuf: mem[i+2]←0;
restarts: firsttime←false; if i≥u+3 then go to sufbegin;
comment Phase 3. Interpretive routine for prefix removal.
The array prefix contains a "program" for a machine with the following
architecture. Instruction words have four fields, namely opcode, truex,
falsex, operand, each 9 bits. There are two registers: the program counter pc and
the character position i. Initially i=u+2 and pc=mem[u+1].
(Thus we begin by branching on the first character, mem[u+1].) The opcodes
are as follows, using t to stand for the operand field of the instruction:
scan. If mem[i]=t, increase i by 1 and go to truex, else go to falsex.
repeat. Set i←i-t. If mem[i+1]=0, stop, otherwise set pc←mem[i],
mem[i]←0, i←i+1.
mark. If t>0 then set mem[i-t]←0. Also remember the value of mem[i],
for phase 4, then set mem[i]←0 (unless mem[i+1]=0) and stop.
table. If mem[i]ε(bit-pattern specified in truex,falsex,oprand fields)
then do a mark 0, otherwise just stop.
fail. Stop.
vow,cons. Stop.
Actually there are four flavors of stopping: One (vow) goes to phase 4 assuming
that mem[i-1] is a vowel, another (cons) goes to phase 4 with mem[i-1] ignored,
the third (fail) omits phase 4 entirely, the last (table when unsuccessful)
goes to phase 4 restarting at the beginning of the word;
define vow=success, cons=again # numeric versions of new opcodes;
phase3: pc←mem[u+1]; i←u+2;
restartp: c←pc; j←i-1;
interpp: case field(opcode,t←prefix[pc]) of begin
[scan]if(mem[i] xor t)land '37 then begin pc←field(falsex,t); go to interpp end
else begin i←i+1; pc←field(truex,t); go to interpp end;
[repeat] begin i←i-field(oprand,t)+1; if mem[i]=0 then go to phase5;
pc←mem[i-1]; mem[i-1]←0; go to restartp end;
[mark] begin if t←field(oprand,t) then mem[i-t]←0; go to marki end;
[table] if t lsh(mem[i]+opcodes)<0 then go to marki
else begin i←j; go to vowelscan end;
[fail] go to phase5;
[vow] go to phase4v;
[cons] go to phase4c;
else confusion
end;
comment Phase 4. This phase implements the consonant-pairs rule for middle
of words, as explained in the TEX writeup. Basically there are a few
special rules for double consonants and combining ch, gh, ph, sh, th into
single consonants, and then there are exceptional pairs of consonants between
which we will not break. There are two classes of exceptions, strong (like bl)
and weak (like ft). The necessary information is packed in btable, whose
words consist of three fields:
hchar specifies code for this character followed by letter h
weak specifies address of "weak" exception table for this character
leading 26 bits, give "strong"∨"weak" exception table
In order to keep hchar and weak to 3-bit fields, their values are encoded in
a straightforward manner that can be deduced by reading the following code;
define hchars=3,hchard=0,weaks=3,weakd=hchars # definition of btable fields;
marki: comment Now mark a permissible hyphen in mem[i] and do phase4 scanning;
if mem[i+1]=0 then go to phase5 # we don't allow only one letter between pref,suf;
if mem[i+1]=o(e) and mem[i+2]=o(d) and mem[i+3]=0 then go to phase5 # don't allow
syllable of form -<consonant>ed-;
c←mem[i]; mem[i]←0; go to vowelscan;
phase4c: c←mem[i];
vowelscan: comment We're looking for a vowel. Now c contains the letter
originally in mem[i], and suffix[0] is a table of vowels (including the null
code 0 as a vowel);
i←i+1; if(suffix[0] lsh c)≥0 then go to phase4c;
checkc: comment Now c is 0 if we've gone too far, else we've found a vowel;
if c=0 then go to phase5;
phase4v: b←mem[i]; i←i+1; if(suffix[0]lsh b)<0 then begin c←b;go to checkc;end;
comment Now b=mem[i-1] is a consonant following a vowel;
phase4vc: c←mem[i];
if b=o(q) and c=o(u) then begin i←i-1; go to marki end;
if(suffix[0] lsh c)<0 then begin i←i+1; go to checkc end;
if b=c then
begin comment double consonant;
if c≠o(l) and c≠o(s) then go to marki else
begin comment ll or ss, check for vowel;
if (c←mem[i+1])=0 then go to phase5;
if(suffix[0]lsh c)<0 then go to ertest;
i←i+2; go to phase4c;
end;
end
else if c=o(h) and j←field(hchar,btable[b]) then
begin comment change ch→e,gh→i,ph→o,sh→u,th→y;
b←b+j-2; i←i+1; go to phase4vc;
end
else if c=o(k) and b=o(c) then begin i←i+1; go to marki end;
if mem[i+1]=o(h) and j←field(hchar,btable[c]) then
begin comment change ch→e, etc., in second consonant position;
c←c+j-2; j←i+2;
end
else j←i+1 # Now j points to where we want a vowel;
if mem[j]=0 then go to phase5;
if(suffix[0] lsh mem[j])<0 then
begin comment vowel-consonant-consonant-vowel found;
if(btable[b] lsh (c-1))≥0 then go to marki # not an exception;
if(btable[field(weak,btable[b])+26] lsh(c-1))≥0 then
begin comment a strong exception;
i←j+1; go to phase4v;
end;
comment a weak exception; i←j-1;
if ((mem[i+1]=o(a) and mem[i+2]=o(g) and finale=i+3)
or (mem[i+1]=o(e) and mem[i+2]=o(s) and mem[i+3]=o(t)))
and mem[i+4]=0 then go to phase5 else go to ertest;
end;
comment three consonants in a row found;
i←j+1; go to phase4c;
ertest: if mem[i+1]=o(e) and mem[i+2]=o(r) and mem[i+3]=0
then go to phase5 else go to marki;
comment Phase 5. We're almost done! Although previous phases may have set mem[u+2]
or mem[u+n-1] or mem[u+n] to zero, we simply ignore this fact as we
output the answer;
phase5:
end;
comment Initializing the exception table;
procedure xent(string s) # enter an exception s;
begin integer n,m,c,w,t,i,j,h; string ss;
ss←s; n←0; w←0; m←0;
while c←lop(s) do
if c="-" then w←w lor 1 else
if c="*" then m←m+1 else
begin n←n+1; w←w lsh 1;
mem[n] ← c land '37;
end;
w←w rot(1-n);
j←7 min n;
while m do begin w←w+(mem[j+m]lsh(5*(m-1))); m←m-1 end;
t←mem[1];
for i←2 thru j do t←(t lsh 5)+mem[i];
h←t mod excepsize;
while t do
begin while exceptable[h]>t do h←h-1;
if h=0 then h←excepsize-1
else if exceptable[h]=t then
begin print("Whoops: double entry ",ss);
return;
end
else begin j←exceptable[h]; c←excephyph[h];
exceptable[h]←t; excephyph[h]←w;
t←j; w←c;
end;
end;
end;
preload_with
"ap-pre-ciable",
"con-trol-lable","un-con-trollable",
"flam-mable","in-flam-mable","in-es-ti-mable",
"for-mi-dable","por-table","im-preg-nable",
"eq-uable","in-sa-tiable","ne-go-tiable","so-ciable","turn-table","un-so-ciable",
"con-stable","stable","un-stable","work-table",
"de-pen-dent","in-de-pen-dent",
"any-thing","dar-ling","dump-ling","eve-ning","every-thing",
"far-thing","found-ling","ink-ling","main-spring","off-spring",
"play-thing","sap-ling","shoe-string","sib-ling","some-thing","star-ling",
"ster-ling","un-err-ing","up-swing","weak-ling","year-ling",
"in-fringed",
"civ-i-lize","crys-tal-lize","im-mo-bi-lize","mo-bi-lize",
"mo-nop-o-lize","sta-bi-li*ze","tan-ta-lize","un-civ-i-lized","uti-lize",
"pal-ate",
"in-clem-ent",
"bar-on-ess","li-on-ess",
"eu-logy","ped-a-gogy",
"lus-cious",
"met-al","non-metal","pet-al","postal","rent-al",
"cat-ion",
"com-bat-ive",
"stat-ure",
"beck-on","beck-oned","bes-tial",
"come-back","co-me-dian","comp-troller",
"cone-flower","co-nun-drum",
"equipped",
"handle-bar",
"inch-worm","ink-blot","inn-keeper",
"in-te-rior",
"min-is-ter","min-is-try",
"none-the-less",
"qua-drille",
"som-er-sault",
"su-pe-rior",
"tran-spire",
"una-nim-ity","unan-i-mous","unc-tuous",
"debt-or",
"ac-knowl-edge",
"de-duct-i*ble",
"vict-ual",
"nee-dle-work","idler",
"off-beat","off-hand","off-print","off-set","off-set-ting","off-shoot","off-shore",
"stiff-en",
"left-ist","left-over","lift-off",
"soft-hearted",
"egg-nog","egg-head",
"cognac","de-sign-er","for-eign-er","poi-gnant","vignette",
"hogs-head",
"child-ish","gold-en","hold-out","hold-over","hold-up",
"self-ish","self-adjoint","un-self-ish",
"bull-ish","crest-fallen","dis-till-*ery","fall-out","lull-aby","roll-away",
"sell-out","small-est","tall-est","wall-eye",
"psalm-ist",
"adult-hood",
"else-where","false-hood",
"volt-age",
"re-volv-er",
"beach-comb-er","bomb-er","climb-er","plumb-er",
"damp-en",
"hence-forth","mince-meat",
"bind-ery","bound-ary",
"fiend-ish","land-owner","out-land-ish","round-about","send-off","stand-out",
"change-over","hang-out","hang-over","orange-ade",
"ant-acid","ant-eater","count-ess","rep-re-sentative",
"ant-hill","pent-house","per-cent-*age",
"adapt-er","crypt-analysis",
"in-ter-ru*p*t-*i*ble",
"an-tiq-uity","in-eq-uity","in-iq-uity","liq-uefy","liq-uid",
"liq-ui-date","pre-req-ui-site","req-ui-sition",
"ubiq-ui-tous",
"herbal",
"arch-angel","re-search-ers",
"board-er","chordal","hard-en","hard-est","haz-ard-ous",
"re-cord-er","stan-dard-ize","stew-ard-ess","yard-age",
"non-con-form-ist",
"cav-ern-ous","dis-cern-ible","mod-ern-ize","turn-about","turn-over",
"west-ern-ize",
"harp-ist","sharp-en",
"ir-re-ver*s-ible","nurse-maid","re-hears-al",
"re-vers-i*ble","wors-en",
"art-ist","as-sert-i*ve","con-vert-ible","court-yard","fore-short-en","heart-ache",
"heart-ily","short-en",
"apart-heid","court-house","earth-en-ware","north-east","north-ern","port-hole",
"ob-serv-er","serv-er",
"pre-school",
"con-de-scend","cre-scen*do","de-cre-scendo","de-scen-dent","de-scent",
"om-ni-scient","pleb-i-scite","re-scind","sea-scape",
"askance","snake-skin","whisk-er",
"cole-slaw",
"rattle-snake",
"class-room","cross-over","dis-miss-al","ex-press-*i*ble",
"less-en","toss-up","un-class-i-fied",
"ar-mi-stice","astig-ma-tism","astir","blast-off","by-stand-er",
"candle-stick","cast-away","cast-off","co-star",
"di-gest-i*ble","east-ern","fore-stall",
"in-di-ges*t-*i*ble","in-ex-haust-ible","ir-re-sist-ible",
"life-style","lime-stone","live-stock","mile-stone",
"pho-to-stat","re-start-ed","re-state-ment","re-store","shy-ster",
"side-step","smoke-stack","sug-gest-*i*ble","thermo-stat","waste-bas-ket",
"waste-land",
"mast-head","post-hu-mous","priest-hood",
"side-swipe",
"watt-meter",
"be-tween",
"kib-itzer",
"buzz-er",
"al-go-rithm","bib-li-ography","bi-no-mial","cat-e-go-ry",
"cen-ter","com-put-a*bil-ity",
"dec-la-ra-tion","de-gree","de-vel-op-ment",
"es-tab-lish","hap-hazard","neg-li-gible","pe-ri-odic",
"poly-no-mial","pre-vious","pro-ce-dure","prob-abil-ity",
"prob-lem-atic","pro-gram-ming","pub-li-ca-tion","pub-lish","ref-er-ence",
"re-place-ment","sub-sequ*e*n*ce","when-ever",
""; string array exceptions[0:excepsize-1];
procedure initex;
begin integer i; string s;
arrclr(exceptable); arrclr(excephyph);
i←0;
while s←exceptions[i] do
begin xent(s); i←i+1;
end;
print("Exception table contains ",i," entries in ordered hash table",
" of size ",excepsize,".");
end;
comment Initializing the suffix table;
procedure initsuf;
begin
define opcodes=9,opcoded=27,truexs=9,truexd=18,falsexs=9,falsexd=9,oprands=9,
oprandd=0 # fields in interpreted instructions;
define scan=0,double=1,table=2,check=3,success=4,fail=5,repeat=6,again=7,
mark=8,efail=9 # numeric equivalents of symbolic opcodes;
define s(n,a,b,c,d)=⊂suffix[n]←(a lsh opcoded)+(b lsh oprandd)+
(c lsh truexd)+(d lsh falsexd)⊃;
define t(c)=⊂(flag lsh -("c" land '37))⊃;
suffix[0]←flag+t(a)+t(e)+t(i)+t(o)+t(u)+t(y);
s(1,fail,0,0,0) # a;
s(2,fail,0,0,0) # b;
s(3,scan,"i",34,1) # c;
s(4,again,0,1,0) # d;
s(5,mark,0,38,0) # e;
s(6,fail,0,0,0) # f;
s(7,scan,"n",60,1) # g;
s(8,fail,0,0,0) # h;
s(9,fail,0,0,0) # i;
s(10,fail,0,0,0) # j;
s(11,fail,0,0,0) # k;
s(12,scan,"a",71,72) # l;
s(13,fail,0,0,0) # m;
s(14,scan,"o",77,1) # n;
s(15,fail,0,0,0) # o;
s(16,fail,0,0,0) # p;
s(17,fail,0,0,0) # q;
s(18,scan,"e",81,1) # r;
s(19,mark,0,85,0) # s;
s(20,scan,"n",94,1) # t;
s(21,fail,0,0,0) # u;
s(22,fail,0,0,0) # v;
s(23,fail,0,0,0) # w;
s(24,fail,0,0,0) # x;
s(25,scan,"l",109,98) # y;
s(26,efail,0,0,0) # z;
s(27,success,0,0,0);
s(28,success,1,0,0);
s(29,success,2,0,0);
s(30,success,3,0,0);
s(31,repeat,0,0,0);
s(32,repeat,1,0,0);
s(33,repeat,2,0,0);
s(34,scan,"p",35,26) # e/ic;
s(35,scan,"o",36,26) # pe/pic;
s(36,scan,"c",37,26) # ope/opic;
s(37,scan,"s",27,26) # cope/copic;
s(38,scan,"l",39,40) # e;
s(39,scan,"b",41,26) # le;
s(40,scan,"t",42,43) # e;
s(41,scan,"a",44,26) # ble;
s(42,scan,"a",45,26) # te;
s(43,scan,"z",46,47) # e;
s(44,scan,"t",48,49) # able/ably;
s(45,table,50,108,26) # ate;
s(46,scan,"i",51,26) # ze;
s(47,scan,"v",52,53) # e;
s(48,table,54,26,32) # table;
s(49,table,107,26,31) # able;
suffix[50]←t(c)+t(l);
s(51,scan,"l",32,26) # ize;
s(52,scan,"i",55,26) # ve;
s(53,scan,"r",56,34) # e;
suffix[54]←t(e)+t(i)+t(o)+t(u)+t(t);
s(55,scan,"t",27,26) # ive/ure;
s(56,scan,"u",55,57) # re;
s(57,scan,"e",58,26) # re;
s(58,scan,"h",59,26) # ere;
s(59,scan,"p",37,26) # here;
s(60,scan,"i",61,1) # ng;
s(61,check,3,62,110) # ing;
s(62,scan,"l",63,64) # ing;
s(63,table,65,27,66) # ling;
s(64,table,67,28,68) # ing;
suffix[65]←t(b)+t(c)+t(d)+t(f)+t(g)+t(p)+t(t)+t(z);
s(66,scan,"k",69,28) # ling;
suffix[67]←t(f)+t(s)+t(z);
s(68,table,0,28,70) # ing;
s(69,scan,"c",29,27) # kling;
s(70,double,0,27,27) # ing;
s(71,scan,"i",73,74) # al;
s(72,scan,"u",75,1) # l;
s(73,scan,"t",27,76) # al/ial;
s(74,scan,"n",14,73) # al;
s(75,scan,"f",31,1) # ul;
s(76,scan,"c",27,1) # al/ial/ient;
s(77,scan,"i",78,1) # on/onal;
s(78,table,79,80,1) # ion/ional;
suffix[79]←t(s)+t(t);
s(80,mark,4,27,0) # sion/sional/tion/tional;
s(81,scan,"h",82,1) # er/y;
s(82,scan,"p",83,1) # her/hy;
s(83,scan,"a",84,1) # pher/phy;
s(84,scan,"r",27,1) # apher/aphy;
s(85,scan,"u",86,87) # s;
s(86,scan,"o",88,4) # us;
s(87,scan,"s",89,4) # s;
s(88,scan,"i",90,4) # ous;
s(89,scan,"e",91,4) # ss;
s(90,scan,"c",92,4) # ious;
s(91,table,93,31,4) # ess;
s(92,scan,"s",27,27) # cious;
suffix[93]←t(l)+t(n);
s(94,scan,"e",95,1) # nt;
s(95,scan,"m",31,96) # ent;
s(96,scan,"d",27,97) # ent;
s(97,scan,"i",76,1) # ent;
s(98,scan,"g",99,100) # y;
s(99,scan,"o",27,1) # gy;
s(100,scan,"r",101,81) # y;
s(101,scan,"a",102,1) # ry;
s(102,scan,"n",103,1) # ary;
s(103,scan,"o",104,27) # nary;
s(104,scan,"i",106,28) # onary;
suffix[105]←t(b)+t(c)+t(d)+t(f)+t(g)+t(h)+t(j)+t(k)+t(l)+t(m)+t(n)+t(p)+t(q)+
t(r)+t(s)+t(t)+t(v)+t(w)+t(x)+t(z);
s(106,repeat,3,0,0) # ionary;
suffix[107]←t(c)+t(f)+t(g)+t(p)+t(r);
s(108,table,0,28,26) # cate/late;
s(109,scan,"b",115,31) # ly;
s(110,check,1,111,1) # ing;
s(111,table,105,112,27) # ing;
s(112,table,105,113,28) # <cons>ing;
s(113,check,0,114,1) # <cons><cons>ing;
s(114,table,105,1,29) # <cons><cons>ing;
s(115,scan,"a",44,32) # bly;
end;
comment Initializing the prefix table;
procedure initpref;
begin
define opcodes=9,opcoded=27,truexs=9,truexd=18,falsexs=9,falsexd=9,oprands=9,
oprandd=0 # fields in interpreted instructions;
define scan(n,c,t,f)=⊂prefix[n]←"c"+(t lsh truexd)+(f lsh falsexd)⊃;
define repeat(n,t)=⊂prefix[n]←(6 lsh opcoded)+t⊃;
define mark(n,t)=⊂prefix[n]←(8 lsh opcoded)+t⊃;
define table(n)=⊂prefix[n]←(2 lsh opcoded)⊃;
define fayl(n)=⊂prefix[n]←5 lsh opcoded⊃;
define vow(n)=⊂prefix[n]←4 lsh opcoded⊃;
define cons(n)=⊂prefix[n]←7 lsh opcoded⊃;
define t(c)=⊂(flag lsh -(("c" land '37)+opcodes))⊃;
define vs=1,cs=6,ts=7 # locations where there is a "vow","cons","table0" inst;
fayl(0) # in case mem[u+1] gets set to zero by the suffix routine;
vow(1) # a;
scan(2,e,34,cs) # b;
scan(3,o,36,cs) # c;
scan(4,i,38,cs) # d;
scan(5,q,41,44) # e;
cons(6) # f;
table(7) # g;
scan(8,a,45,47) # h;
scan(9,m,27,55) # i;
cons(10) # j;
cons(11) # k;
scan(12,e,61,cs) # l;
scan(13,a,63,70) # m;
scan(14,o,76,cs) # n;
scan(15,u,77,78) # o;
scan(16,s,81,cs) # p;
scan(17,u,85,cs) # q;
cons(18) # r;
scan(19,e,87,89) # s;
scan(20,h,97,99) # t;
scan(21,n,106,vs) # u;
cons(22) # v;
cons(23) # w;
cons(24) # x;
vow(25) # y;
cons(26) # z;
repeat(27,0);
repeat(28,1);
repeat(29,2);
mark(30,0);
mark(31,1);
mark(32,2);
mark(33,3);
table(34)+t(c)+t(f)+t(h)+t(s)+t(w) # be;
scan(35,i,vs,27) # un;
scan(36,m,30,37) # co;
scan(37,n,30,vs) # co;
scan(38,s,39,vs) # di;
scan(39,h,ts,40) # dis;
scan(40,y,vs,27) # dis;
scan(41,u,42,cs) # eq;
scan(42,i,43,cs) # equ;
scan(43,v,30,30) # equi;
scan(44,x,30,vs) # e;
scan(45,n,46,vs) # ha;
scan(46,d,30,ts) # han;
scan(47,o,48,51) # h;
scan(48,r,49,vs) # ho;
scan(49,s,50,ts) # hor;
scan(50,e,30,ts) # hors;
scan(51,y,52,cs) # h;
scan(52,p,53,vs) # hy;
scan(53,e,54,ts) # hyp;
scan(54,r,33,vs) # hype;
scan(55,n,56,vs) # i;
scan(56,t,57,27) # in;
scan(57,e,58,59) # int;
scan(58,r,33,29) # inte;
scan(59,r,60,28) # int;
scan(60,o,33,29) # intr;
scan(61,x,62,vs) # le;
scan(62,i,31,ts) # lex/max/min;
scan(63,c,64,66) # ma;
scan(64,r,65,ts) # mac;
scan(65,o,32,ts) # macr;
scan(66,t,67,69) # ma;
scan(67,h,68,ts) # mat;
scan(68,e,31,ts) # math;
scan(69,x,62,vs) # ma;
scan(70,i,71,72) # m;
scan(71,n,62,vs) # mi;
scan(72,u,73,cs) # m;
scan(73,l,74,vs) # mu;
scan(74,t,75,ts) # mul;
scan(75,i,32,ts) # mult;
scan(76,n,27,vs) # no;
scan(77,t,30,vs) # ou;
scan(78,v,79,vs) # o;
scan(79,e,80,ts) # ov;
scan(80,r,27,vs) # ove;
scan(81,e,82,cs) # ps;
scan(82,u,83,vs) # pse;
scan(83,d,84,vs) # pseu;
scan(84,o,32,ts) # pseud;
scan(85,a,86,cs) # qu;
scan(86,d,30,vs) # qua;
scan(87,m,88,vs) # se;
scan(88,i,30,ts) # sem;
scan(89,o,90,92) # s;
scan(90,m,91,vs) # so;
scan(91,e,30,ts) # som/ther;
scan(92,u,93,cs) # s;
scan(93,b,30,94) # su;
scan(94,p,95,vs) # su;
scan(95,e,96,ts) # sup;
scan(96,r,33,vs) # supe;
scan(97,e,98,cs) # th;
scan(98,r,91,vs) # the;
scan(99,r,100,cs) # t;
scan(100,a,101,104) # tr;
scan(101,n,102,vs) # tra;
scan(102,s,103,ts) # tran;
table(103)+t(a)+t(f)+t(g)+t(l)+t(m)+t(p)+t(s)+t(v) # trans;
scan(104,i,105,cs) # tr;
table(105)+t(a)+t(f)+t(u) # tri;
scan(106,d,107,35) # un;
scan(107,e,108,28) # und;
scan(108,r,33,29) # unde;
end;
comment Initializing the consonant-pair table;
procedure initb # sets btable;
begin
define hchars=3,hchard=0,weaks=3,weakd=3 # definition of btable fields;
define t(c)=⊂(flag lsh -(("c" land '37)-1))⊃;
define weak(n)=⊂(n lsh weakd) lor btable[26+n]⊃;
define b(n)=⊂btable[n]←0⊃;
b(26) # weak(0) and z;
b(27)+t(t) # weak(1), for f and s;
b(28)+t(d) # weak(2), for l;
b(29)+t(p) # weak(3), for m;
b(30)+t(d)+t(g)+t(s)+t(t) # weak(4), for n;
b(31)+t(g)+t(m)+t(n)+t(t) # weak(5), for r;
b(2)+t(l)+t(r) # b;
b(3)+t(l)+t(r)+4 # c;
b(4)+t(g)+t(r) # d;
b(5)+t(l)+t(r) # ch;
b(6)+t(l)+t(r)+weak(1) # f;
b(7)+t(l)+t(r)+4 # g;
b(8) # h;
b(9)+t(t) # gh;
b(10) # j;
b(11)+t(n) # k;
b(12)+t(k)+t(q)+weak(2) # l;
b(13)+weak(3) # m;
b(14)+t(e)+t(k)+t(x)+weak(4) # n;
b(15)+t(r) # ph;
b(16)+t(l)+t(r)+1 # p;
b(17) # q;
b(18)+t(k)+weak(5) # r;
b(19)+t(p)+t(q)+weak(1)+4 # s;
b(20)+t(e)+t(r)+7 # t;
b(21) # sh;
b(22) # v;
b(23)+t(h)+t(l)+t(n)+t(r) # w;
b(24) # x;
b(25)+t(r) # th;
end;
comment The driver program;
integer u,n,c; string s,ss;
initex;initsuf;initpref;initb;
u←3;
while true do
begin print(newline,": ");
s←inchwl;
if s=0 then done;
ss←s; n←0; mem[u]←0;
while c←lop(ss) do begin n←n+1; mem[u+n]←c land '37; end;
mem[u+n+1]←0;
if n<4 then print(s) else
begin hyphenate(u,n);
mem[u+1]←mem[u+2]←mem[u+n-1]←mem[u+n]←1;
n←0; while c←lop(s) do
begin n←n+1;
if mem[u+n]=0 and(u+n+2<finale or u+n>finale)
then print("-"&c) else print(null&c);
end;
end;
end;
end